1 Introduction

The new peak calling round applied on the previous notebook significantly increased the number of the features identified in our dataset. Therefore, we must need to repeat the standard downstream analysis, including data normalization, dimensionality reduction analysis and batch correction to account for this change in the number of features detected.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(SeuratWrappers)
library(Signac)
library(harmony)
library(tidyverse)

set.seed(1234)

2.2 Parameters

# Paths
path_to_obj <- here::here("scATAC-seq/results/R_objects/8.1.tonsil_atac_integrated_with_multiome_annotated_level1_new_peakcalling.rds")
path_to_save_obj <- here::here("scATAC-seq/results/R_objects/8.2.tonsil_peakcalling_annotation_level1_integrated.rds")
path_tmp_dir <- here::here("scATAC-seq/4-clustering/1-level_1/tmp/")

path_to_save_dimred_uncorrect <- str_c(path_tmp_dir, "batch_uncorrected_lsi.rds", sep = "")
path_to_save_dimred_correct <- str_c(path_tmp_dir, "batch_corrected_lsi.rds", sep = "")
path_to_save_confounders_df <- str_c(path_tmp_dir, "confounders_df.rds", sep = "") 

2.3 Load data

tonsil <- readRDS(path_to_obj)
tonsil
## An object of class Seurat 
## 270784 features across 101076 samples within 1 assay 
## Active assay: peaks_macs (270784 features, 0 variable features)
##  1 dimensional reduction calculated: umap

3 Visualize UMAP without batch effect correction

# Process Seurat object
tonsil <- tonsil %>%
  RunTFIDF() %>% 
  FindTopFeatures(min.cutoff = "q0") %>%
  RunSVD() %>%
  RunUMAP(reduction = "lsi", dims = 2:40)

DepthCor(tonsil)

# Visualize UMAP
confounders <- c("library_name", "sex", "age_group", "hospital", "assay")
umaps_before_integration <- purrr::map(confounders, function(x) {
  p <- DimPlot(tonsil, group.by = x, pt.size = 0.1)
  p
})
names(umaps_before_integration) <- confounders
print("UMAP colored by GEM:")
## [1] "UMAP colored by GEM:"
umaps_before_integration$library_name + NoLegend()

print("UMAP colored by sex, age group, cell hashing status, sampling center and assay:")
## [1] "UMAP colored by sex, age group, cell hashing status, sampling center and assay:"
umaps_before_integration[2:length(umaps_before_integration)]
## $sex

## 
## $age_group

## 
## $hospital

## 
## $assay

4 Run and visualize Harmony’s integration

tonsil <- RunHarmony(
  object = tonsil,
  group.by.vars = "gem_id", 
  reduction = "lsi",
  dims = 2:40,
  assay.use = "peaks_macs",
  project.dim = FALSE
)

tonsil <- RunUMAP(tonsil, dims = 2:40, reduction = "harmony")

umaps_after_integration <- purrr::map(confounders, function(x) {
  p <- DimPlot(tonsil, group.by = x, pt.size = 0.1)
  p
})
names(umaps_after_integration) <- confounders
print("UMAP colored by GEM:")
## [1] "UMAP colored by GEM:"
umaps_after_integration$library_name + NoLegend()

print("UMAP colored by sex, age group, cell hashing status, sampling center and assay:")
## [1] "UMAP colored by sex, age group, cell hashing status, sampling center and assay:"
umaps_after_integration[2:length(umaps_before_integration)]
## $sex

## 
## $age_group

## 
## $hospital

## 
## $assay

5 Save

# Save integrated Seurat object
saveRDS(tonsil, path_to_save_obj)


# Save PCA matrices to compute the Local Inverse Simpson Index (LISI)
confounders_df <- tonsil@meta.data[, confounders]
saveRDS(confounders_df, path_to_save_confounders_df)
saveRDS(
  tonsil@reductions$lsi@cell.embeddings[, 2:40],
  path_to_save_dimred_uncorrect
)
saveRDS(
  tonsil@reductions$harmony@cell.embeddings[, 2:40],
  path_to_save_dimred_correct
)

6 Session Information

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Tonsil_atlas/lib/libopenblasp-r0.3.10.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.0        stringr_1.4.0        dplyr_1.0.2          purrr_0.3.4          readr_1.4.0          tidyr_1.1.2          tibble_3.0.4         ggplot2_3.3.2        tidyverse_1.3.0      harmony_1.0          Rcpp_1.0.5           Signac_1.1.0.9000    SeuratWrappers_0.3.0 Seurat_3.9.9.9010    BiocStyle_2.16.1    
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.1              SnowballC_0.7.0             rtracklayer_1.48.0          GGally_2.0.0                bit64_4.0.5                 knitr_1.30                  irlba_2.3.3                 DelayedArray_0.14.0         data.table_1.13.2           rpart_4.1-15                RCurl_1.98-1.2              AnnotationFilter_1.12.0     generics_0.1.0              BiocGenerics_0.34.0         GenomicFeatures_1.40.1      cowplot_1.1.0               RSQLite_2.2.1               RANN_2.6.1                  future_1.20.1               bit_4.0.4                   spatstat.data_1.4-3         xml2_1.3.2                  lubridate_1.7.9             httpuv_1.5.4                SummarizedExperiment_1.18.1 assertthat_0.2.1            xfun_0.18                   hms_0.5.3                   evaluate_0.14               promises_1.1.1              fansi_0.4.1                 progress_1.2.2              dbplyr_1.4.4                readxl_1.3.1                igraph_1.2.6                DBI_1.1.0                   htmlwidgets_1.5.2           reshape_0.8.8               stats4_4.0.3                ellipsis_0.3.1              RSpectra_0.16-0             backports_1.2.0            
##  [43] bookdown_0.21               biomaRt_2.44.4              deldir_0.2-3                vctrs_0.3.4                 Biobase_2.48.0              remotes_2.2.0               here_1.0.1                  ensembldb_2.12.1            ROCR_1.0-11                 abind_1.4-5                 withr_2.3.0                 ggforce_0.3.2               BSgenome_1.56.0             checkmate_2.0.0             sctransform_0.3.1           GenomicAlignments_1.24.0    prettyunits_1.1.1           goftest_1.2-2               cluster_2.1.0               lazyeval_0.2.2              crayon_1.3.4                pkgconfig_2.0.3             labeling_0.4.2              tweenr_1.0.1                GenomeInfoDb_1.24.0         nlme_3.1-150                ProtGenerics_1.20.0         nnet_7.3-14                 rlang_0.4.8                 globals_0.13.1              lifecycle_0.2.0             miniUI_0.1.1.1              BiocFileCache_1.12.1        modelr_0.1.8                rsvd_1.0.3                  dichromat_2.0-0             cellranger_1.1.0            rprojroot_2.0.2             polyclip_1.10-0             matrixStats_0.57.0          lmtest_0.9-38               graph_1.66.0               
##  [85] Matrix_1.2-18               ggseqlogo_0.1               zoo_1.8-8                   reprex_0.3.0                base64enc_0.1-3             ggridges_0.5.2              png_0.1-7                   viridisLite_0.3.0           bitops_1.0-6                KernSmooth_2.23-17          Biostrings_2.56.0           blob_1.2.1                  parallelly_1.21.0           jpeg_0.1-8.1                S4Vectors_0.26.0            scales_1.1.1                memoise_1.1.0               magrittr_1.5                plyr_1.8.6                  ica_1.0-2                   zlibbioc_1.34.0             compiler_4.0.3              RColorBrewer_1.1-2          fitdistrplus_1.1-1          Rsamtools_2.4.0             cli_2.1.0                   XVector_0.28.0              listenv_0.8.0               patchwork_1.1.0             pbapply_1.4-3               htmlTable_2.1.0             Formula_1.2-4               MASS_7.3-53                 mgcv_1.8-33                 tidyselect_1.1.0            stringi_1.5.3               yaml_2.2.1                  askpass_1.1                 latticeExtra_0.6-29         ggrepel_0.8.2               grid_4.0.3                  VariantAnnotation_1.34.0   
## [127] fastmatch_1.1-0             tools_4.0.3                 future.apply_1.6.0          parallel_4.0.3              rstudioapi_0.12             foreign_0.8-80              lsa_0.73.2                  gridExtra_2.3               farver_2.0.3                Rtsne_0.15                  digest_0.6.27               BiocManager_1.30.10         shiny_1.5.0                 GenomicRanges_1.40.0        broom_0.7.2                 later_1.1.0.1               RcppAnnoy_0.0.16            OrganismDbi_1.30.0          httr_1.4.2                  AnnotationDbi_1.50.3        ggbio_1.36.0                biovizBase_1.36.0           colorspace_2.0-0            rvest_0.3.6                 XML_3.99-0.3                fs_1.5.0                    tensor_1.5                  reticulate_1.18             IRanges_2.22.1              splines_4.0.3               uwot_0.1.8.9001             RBGL_1.64.0                 RcppRoll_0.3.0              spatstat.utils_1.17-0       plotly_4.9.2.1              xtable_1.8-4                jsonlite_1.7.1              spatstat_1.64-1             R6_2.5.0                    Hmisc_4.4-1                 pillar_1.4.6                htmltools_0.5.0            
## [169] mime_0.9                    glue_1.4.2                  fastmap_1.0.1               BiocParallel_1.22.0         codetools_0.2-17            lattice_0.20-41             curl_4.3                    leiden_0.3.5                openssl_1.4.3               survival_3.2-7              rmarkdown_2.5               munsell_0.5.0               GenomeInfoDbData_1.2.3      haven_2.3.1                 reshape2_1.4.4              gtable_0.3.0